In [1]:
import vincent
import pandas as pd
from vincent import AxisProperties, PropertySet, ValueRef
from vincent import Map
vincent.core.initialize_notebook()
In [2]:
incidents = pd.read_csv('sanfrancisco_incidents_summer_2014.csv')
Changing the column labels of the data set
In [3]:
incidents.columns = ['Id'
,'Category'
,'Description'
,'DayOfWeek'
,'Date'
,'Time'
,'District'
,'Resolution'
,'Address'
,'Longitude'
,'Latitude'
,'Location'
,'PdId']
The date and time of incident are in two separate columns. Combining them into a DateTime column
In [4]:
# the date and time of incident are in two separate columns
# combining them into a date_time column
incidents['DateTime'] = pd.to_datetime(incidents['Date'] + ' ' + incidents['Time'])
date_idx = pd.DatetimeIndex(incidents['DateTime'])
incidents['Date'] = date_idx.date.astype('datetime64')
incidents['Hour'] = date_idx.hour
incidents['Year'] = date_idx.year
incidents['Month'] = date_idx.month
incidents['Weekday'] = date_idx.weekday
In [5]:
count_by_category = pd.DataFrame({'count' : incidents.groupby( ['Category'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['Category']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='Category')
graph.legend(title='Category')
graph.axis_titles(x='Category', y='Incident Count')
ax = AxisProperties(
labels=PropertySet(
angle=ValueRef(value=270),
align=ValueRef(value='right')
)
)
graph.axes[0].properties = ax
graph.display()
#ax = count_by_category.plot(kind="barh",x='Category', y='count',sort_columns=True)
In [6]:
by_year = incidents.pivot_table('Id'
, aggfunc='count'
, index='Weekday'
, columns='District')
In [7]:
graph = vincent.Line(by_year)
graph.legend(title='District')
graph.axis_titles(x='Weekday', y='Incident Count')
graph.display()
In [8]:
filtered = incidents[incidents['District'] == 'MISSION']
filtered = filtered[~filtered['Category'].isin(['LARCENY/THEFT'
,'NON-CRIMINAL'
,'OTHER OFFENSES'
,'WARRANTS'])]
In [9]:
count_by_category = pd.DataFrame({'count' : filtered.groupby( ['Category'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['Category']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='Category')
graph.legend(title='Category')
graph.axis_titles(x='Category', y='Incident Count')
ax = AxisProperties(
labels=PropertySet(
angle=ValueRef(value=270),
align=ValueRef(value='right')
)
)
graph.axes[0].properties = ax
graph.display()
In [10]:
filter_by_category = 'MISSING PERSON'
In [11]:
filtered = incidents[incidents['Category'] == filter_by_category]
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Line(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [12]:
count_by_category = pd.DataFrame({'count' : filtered.groupby( ['District'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['District']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='District')
graph.legend(title='District')
graph.axis_titles(x='District', y='Incident Count')
ax = AxisProperties(
labels=PropertySet(
angle=ValueRef(value=270),
align=ValueRef(value='right')
)
)
graph.axes[0].properties = ax
graph.display()
In [13]:
filter_by_districts = ['MISSION','SOUTHERN','PARK','BAYVIEW']
In [14]:
filtered = incidents[incidents['Category'] == filter_by_category]
filtered = filtered[filtered['District'].isin(filter_by_districts)]
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Line(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [15]:
filter_by_district = 'MISSION'
filter_by_category = 'MISSING PERSON'
filtered = incidents[incidents['Category'] == filter_by_category]
filtered = filtered[filtered['District'] == filter_by_district]
In [16]:
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='Category')
graph = vincent.Line(by_hour)
graph.legend(title='Category')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [17]:
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Bar(by_hour)
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [18]:
by_weekday = filtered.pivot_table('Id'
, aggfunc='count'
, index='Weekday'
, columns='District')
graph = vincent.Bar(by_weekday)
graph.legend(title='District')
graph.axis_titles(x='Weekday', y='Incident Count')
graph.display()
In [19]:
filtered = filtered[filtered['Weekday'] == 4]
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Bar(by_hour)
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [ ]: